import requests
import pandas as pd
from config import schema, db
from sqlalchemy import create_engine



def _get_ged_slice(next_page_url, token=None):
    """
    Fetch a page (a slice from GED from a known next page URL;
    Next page URL is generated by the UCDP GED API upon a successful paged request
    Private method. Should not be called directly.
    """
    headers = {'x-ucdp-access-token': token}
    r = requests.get(next_page_url, headers=headers)
    output = r.json()
    next_page_url = output['NextPageUrl'] if output['NextPageUrl'] != '' else None
    ged = pd.DataFrame(output['Result'])
    page_count = output['TotalPages']
    return next_page_url, ged, page_count


def getGED(version_number='5.9.99', optional_extra_args=None, token='48dda3460c347f3b'):
    """
    Retrieves a UCDP GED table from the UCDP RESTFUL API
    :param version_number: A UCDP GED or UCDP GED Candidate valid version number. E.g. 18.1, 18.0.9 etc.
    :param optional_extra_args: A UCDP GED filtering string. Check ucdp.uu.se for options.
    :param token A UCDP GED API token. Will work with None subject to access limitations for public access (approx. 100/day)
    Contact UCDP
    :return: A full UCDP GED version (irrespective of number of battle events) as a Pandas df.
    """

    print ("Getting GED...\n")
    cur_page = 1

    if optional_extra_args is not None:
        next_page_url = 'http://ucdpapi.pcr.uu.se/api/gedevents/' + version_number + '?pagesize=1000&' + \
                        str(optional_extra_args)
    else:
        next_page_url = 'http://ucdpapi.pcr.uu.se/api/gedevents/'+version_number+'?pagesize=1000'

    ged = pd.DataFrame()

    while next_page_url:
        next_page_url, ged_slice, total_pages = _get_ged_slice(next_page_url=next_page_url,
                                                 token=token)
        ged = ged.append(ged_slice, ignore_index=True)
        print(cur_page,'/',total_pages,'pages loaded')
        cur_page+=1

    print(f"\n Table {version_number} fetched :::" + str(ged.shape[0]) + "\n")
    ged['version'] = version_number
    return (ged)


def getFullCand(base_version = '18.0.',
                versions = list(range(1,13))
):
    """
    Collates multiple UCDP monthly GED candidates table.
    :param base_version: Give the base version, e.g. 18.0. for 2018, 19.0. for 2019, 19.9 for internal 19.9
    :param versions: A list of monthly editions you want collated. list(range(1,13)) gives a whole year.
    Fails on non-existing editions.
    :return: A collated set of GED candidates table. Note that it isn't deduplicated.
    """
    full_ged = pd.DataFrame()
    for version in versions:
        version = base_version + str(version)
        full_ged = pd.concat([
            full_ged, getGED(version)
        ], ignore_index=True)
    return full_ged

def SQLDeduplicate(table="cand_data"):
    query=f"""
    with a as (
       SELECT *,
              rank() OVER (PARTITION BY id ORDER BY version ASC) as revised_id
       FROM {schema}.{table}
     )
     DELETE FROM {schema}.{table} WHERE index IN (SELECT index FROM a WHERE revised_id>1)
     """
    engine = create_engine(db)
    with engine.connect() as con:
        trans = con.begin()
        result = con.execute(query)
        print(f"Found and removed {result.rowcount} duplicates. Oldest (initial) commits kept")
        trans.commit()



#print("Fetching Candidate data...")

#full_ged_cand = getFullCand()

print("Fetching 2019 eval data...")
#eval_ged_cand = getFullCand(base_version = '19.0.',versions = list(range(1,13)))

print("Fetching 2019 eval data...")
#eval_ged_cand2020 = getFullCand(base_version = '20.0.',versions = list(range(1,2)))

#print("Fetching Revised data...")
#full_ged_real = getGED('19.1')

#print("Fetching old data...")
#full_ged_old = getGED('18.1')

engine = create_engine(db)
#print(f"Writing to db in schema {schema} as table cand_data")
#full_ged_cand.to_sql(name='cand_data', schema=schema, con=engine, if_exists='replace')

print(f"Writing to db in schema {schema} as table cand_eval")
#eval_ged_cand.to_sql(name='cand19_full', schema=schema, con=engine, if_exists='replace')
#eval_ged_cand2020.to_sql(name='cand20_full', schema=schema, con=engine, if_exists='replace')


#print(f"Writing to db in schema {schema} as table ged191_full")
#full_ged_real.to_sql(name='ged191', schema=schema, con=engine, if_exists='replace')

#print(f"Writing to db in schema {schema} as table ged181")
#full_ged_old.to_sql(name='ged181', schema=schema, con=engine, if_exists='replace')

#SQLDeduplicate("cand_data")
#SQLDeduplicate("cand_eval")

SQLDeduplicate(table="cand19_full")